;; This module provides functionality for reading in CSV files. The assumptions
;; is, that the CSV file is really a comma-separated values file.

;; Another detail is, that leading and trailing whitespace is stripped from the
;; values in the CSV.

;; What do we need?

#|
(define-public DEFAULT-DATA-READER-MAKER
  (make-csv-reader-maker
   '((seperator-chars #\,)
     (strip-leading-whitespace . true)
     (strip-trailing-whitespace . true))))
|#

(define-module (utils csv))

(use-modules
 ;; dsv - delimiter separated values library for reading CSV files
 (dsv)
 (utils list)
 ;; R6RS for assertions
 ((rnrs) #:version (6) #:prefix rnrs:))

;; Maybe these should be put in the data.scm file as abstractions over data
;; structures used to represent data sets, because they deal with data sets
;; (export).

;; =============================
;; LIBRARY INTERFACE ABSTRACTION
;; =============================
(define-public read-dsv-from-file
  (lambda* (file-path
            #:optional (delimiter #\,)
            #:key
            (format 'unix)
            (comment-prefix 'default)
            (encoding "UTF-8"))
    (call-with-input-file file-path
      (lambda (port)
        (set-port-encoding! port encoding)
        (dsv->scm port
                  delimiter
                  #:format format
                  #:comment-prefix comment-prefix)))))


(define-public read-dsv-from-string
  (lambda* (input
            #:optional (delimiter #\,)
            #:key
            (format 'unix)
            (comment-prefix 'default))
    (dsv-string->scm input
                     delimiter
                     #:format format
                     #:comment-prefix comment-prefix)))


(define-public write-scm-dsv-to-file
  (lambda* (scm-output
            file-path
            #:optional (delimiter #\,)
            #:key
            (format 'unix)
            (comment-prefix 'default)
            (encoding "UTF-8"))
    ;; For some unknown reason scm->dsv expects everything inside the
    ;; list to be strings already. This means we need to convert to
    ;; strings before giving the data to scm->dsv.

    ;; WARNING: There is no representation for symbols in DSV files, so
    ;; symbols and strings are not distinguished within such a
    ;; file. That means the conversion is lossy for some types of data.
    (call-with-output-file file-path
      (lambda (port)
        (rnrs:assert (list? scm-output))
        (set-port-encoding! port encoding)
        (scm->dsv (stringify* scm-output)
                  port
                  delimiter
                  #:format format)))))


(define-public write-scm-dsv-to-string
  (lambda* (scm-output
            #:optional (delimiter #\,)
            #:key
            (format 'unix)
            (comment-prefix 'default))
    (scm->dsv-string (stringify* scm-output)
                     delimiter
                     #:format format)))


(define-public all-rows
  (lambda* (a-file-path
            #:key
            (converters '()))
    "Read in a DSV and return all rows of it as a list of vectors. `converters`
is a list of lists containing column converters, which are applied to each
column."

    (define convert-row
      (lambda (row row-converters)
        (let iter-cells ([remaining-cells row]
                         [remaining-converters row-converters])
          (cond
           [(null? remaining-converters) remaining-cells]
           [(null? remaining-cells) '()]
           [else
            ;; convert the current cell and recur with the remaining cells
            (cons (convert-cell (car remaining-cells) (car remaining-converters))
                  (iter-cells (cdr remaining-cells)
                              (cdr remaining-converters)))]))))

    (define convert-cell
      (lambda (cell cell-converters)
        (apply-multiple cell-converters cell)))

    ;; list of vectors
    (let ([dataset (read-dsv-from-file a-file-path)])

      (rnrs:assert (>= (length (car dataset)) 1))
      (rnrs:assert (<= (length converters) (length (car dataset))))

      (map (lambda (row) (list->vector (convert-row row converters)))
           dataset))))
